**********************************************************
/* Replication script for analyses in
	"Childhood Exposure to Co-Ethnics Increases Naturalization"

	Mathias Kruse, 2024 
*/
**********************************************************


* set up
	clear
	clear matrix
	clear mata
	set maxvar 120000 

	graph set window fontface default


* Load dataset
	use  "E:\workdata\702992\702992\Mathias Kruse\Diversity - Childhood_Adulthood\Datasæt\Full_sample_pnas.dta", clear
		
		
* Set working directory 
	cd "E:\workdata\702992\702992\Mathias Kruse\Diversity - Childhood_Adulthood\Immigrant integration\Results\pnas_replication" 
		
	
	
**********************

* Setting main analysis frame and defining main sample 

***********************

if 1==1{

frame copy default analysis
frame change analysis


* Define and restrict sample
	
	* Drop:
		* a. Immigrants and descendants with citizenship throughout the study period
			drop if cit_min==1 & cit_max==1

		* b. Immigrants and descendants with citizenship before the first school-year observation
			drop if year_cit<year_start_min

		* c. School-observations after getting citizenship
			drop if year_cit<year_start

		* d. Registration errors
			drop if year_reg_error==1
			drop if year_start_min==2006 & year_start>2007
			drop if imm_error==1

		* e. Immigrants and descendants with an EU-background
			tab eu_scheng, miss
			keep if eu_scheng==0
		

		
* Setting covariates
	global covariates_missing ///
		i.gender_mis i.month_mis i.western_mis i.school_attend_n_mis
		
	global covariates_parents_missing ///
		avg_lninc i.m_edu_mis i.f_edu_mis i.m_cit_mis i.f_cit_mis i.m_work_mis i.f_work_mis i.m_single_mis i.f_single_mis i.f_western_mis i.registered

	global covariates_cohort_missing ///
		cohort_gender cohort_lninc2 cohort_edu cohort_nowork_mis	

	global covariates_class_missing ///
		class_gender class_lninc2 class_edu class_nowork2
	
	
	
* Main analysis sample 
	drop sample
	
	reghdfe cit_change c.conc_maj i.pnr_count i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing , absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	gen sample = e(sample)

* Store baseline estimates for coefplots used below	
	reghdfe cit_change c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo base

	reghdfe cit_change c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo base2
	
* Generate standardized math and Danish scores used below
	egen math_std = std(math_avg)
	egen dan_std = std(dan_avg)
	
}




**********************

* Main analyses
	// Codes for reproducing Fig. 1 to Fig. 5 in the manuscript

**********************



if 1==1{


* FIGURE 1 

	* Panel A
		reghdfe cit_change c.conc_maj##c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		margins, at(conc_maj=(0 (0.1) 1))
		marginsplot, ///
			title("") ytitle("Predicted probability" "of acquiring citizenship") xtitle("Ethnic majority concentration") ///
			scheme(plotplain) xlabel(, nogrid) ///
			recast(line) recastci(rline) ciopts(lpattern("--")) ylab(0 (0.10) 0.45) name("fig_1_panel_a", replace)
		graph export "Fig. 1_panel_a.pdf", replace
		
	* Panel B
		summ family_dev2 if sample==1
		hist family_dev2 if sample==1, xtitle("Deviations from school-family average") graphregion(color(white)) scheme(plotplain) xlab(, nogrid) xscale(range(-0.60 0.60)) xlabel(-0.60 (0.2) 0.60) fysiz(50) fxsize(70) name("fig_1_panel_b", replace) ylab(0 (10) 30)
		graph export "Fig. 1_panel_b.pdf", replace
	
	graph combine fig_1_panel_a fig_1_panel_b, row(2) ysize(5.3) xsize(4.3) scale(0.90)
	graph export "Fig. 1_combined.pdf", replace



* FIGURE 2 

	* Panel A 
		reghdfe cit_change c.conc_enclave i.pnr_count i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		margins, at(conc_enclave=(0 (0.1) 1))
		marginsplot, ///
			title("") ytitle("Predicted probability" "of acquiring citizenship") xtitle("Co-ethnic concentration") ///
			scheme(plotplain) xlabel(, nogrid) ///
			recast(line) recastci(rline) ciopts(lpattern("--")) ylab(0 (0.10) 0.45) name("fig_2_panel_a", replace)
		graph export "Fig. 2_panel_a.pdf", replace

	* Panel B
		summ family_dev_enclave if sample==1
		hist family_dev_enclave if sample==1, xtitle("Deviations from school-family average") graphregion(color(white)) scheme(plotplain) xlab(, nogrid) xscale(range(-0.60 0.60)) xlabel(-0.60 (0.2) 0.60) name("fig_2_panel_b", replace) fysiz(50) fxsize(70)
		graph export "Fig. 2_panel_b.pdf", replace

	graph combine fig_2_panel_a fig_2_panel_b, row(2) ysize(5.3) xsize(4.3) scale(0.90)
	graph export "Fig. 2_combined.pdf", replace

			
		
* FIGURE 3	

	* Panel A (full population)
		frame change default 
		
		egen math_std = std(math_avg)
		egen dan_std = std(dan_avg)
		
		* Majority concentration
			reghdfe math_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing  if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo maj_math_full_noneu
			reghdfe dan_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo maj_dan_full_noneu

		* Co-ethnic concentration
			reghdfe math_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo enclave_math_full_noneu
			reghdfe dan_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo enclave_dan_full_noneu
	
		coefplot maj_math_full_noneu maj_dan_full_noneu enclave_math_full_noneu enclave_dan_full_noneu, vertical keep(conc_maj conc_enclave) yline(0) scheme(plotplain) ci(95 90) coeflabels(conc_maj="Majority concentration" conc_enclave="Co-ethnic concentration")  legend(off)  mlabel format(%9.2f) mlabposition(3) xline(1.5, lpattern(...))  ylab(-0.3 (0.1) 0.3) ylab(, format(%9.1f)) name("fig_3_panel_a", replace)		
		graph export "Fig. 3_panel_a.pdf", replace
			
		frame change analysis	
	
	* Panel B (main sample)
 		frame change analysis	
	
		* Majority concentration
			reghdfe math_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo maj_math_noneu
			reghdfe dan_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo maj_dan_noneu

		* Co-ethnic concentration
			reghdfe math_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo enclave_math_noneu
			reghdfe dan_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo enclave_dan_noneu

		coefplot maj_math_noneu maj_dan_noneu enclave_math_noneu enclave_dan_noneu, vertical keep(conc_maj conc_enclave) yline(0) ylab(-0.3 (0.1) 0.3) scheme(plotplain) ci(95 90) legend(order(3 "Math, std." 6 "Danish, std." 9 "Math, std." 12 "Danish, std."))  legend(pos(5) ring(0)) coeflabels(conc_maj="Majority concentration" conc_enclave="Co-ethnic concentration") mlabel format(%9.2f) mlabposition(3) xline(1.5, lpattern(...)) ylab(, format(%9.1f)) name("fig_3_panel_b", replace)
		graph export "Fig. 3_panel_b.pdf", replace
		
	graph combine fig_3_panel_a fig_3_panel_b, ysize(3) xsize(5.5)
	graph export "Fig. 3_combined.pdf", replace
	
	
	
* FIGURE 4

	* Generate dummy for whether citizenship was acquired before/after 9th grade
		gen cit_9th_dum = 0 if year_cit<=year_start_9th & year_cit!=.
		replace cit_9th_dum = 1 if year_cit>year_start_9th & year_cit!=.
	
	* Math
		reg cit_change math_std if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & year_start>=2001 , r 
		eststo math_biv 
		reghdfe cit_change math_std c.conc_enclave i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & year_start>=2001 , absorb(mor_idn#inst) keepsingletons
		eststo math_familyxschool

	* Danish
		reg cit_change dan_std if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & year_start>=2001 , r 
		eststo dan_std
		reghdfe cit_change dan_std c.conc_enclave i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & eu_scheng==0 & year_start>=2001 , absorb(mor_idn#inst) keepsingletons
		eststo dan_familyxschool

	coefplot math_biv math_familyxschool dan_std dan_familyxschool, vertical keep(math_std dan_std)  yline(0) ylab(-0.05 (0.05) 0.10) scheme(plotplain) ci(95 90) coeflabels(math_std = "Math, std." dan_std = "Danish, std.") legend(order(3 "Bivariate" 6 "Family x School FE" 9 "Bivariate" 12 "Family x School FE")) mlabel format(%9.3f) mlabposition(3) ylab(, format(%9.2f))  xline(1.5, lpattern(...)) ysize(4) xsize(4.5) scale(0.90)

	graph export "Fig. 4.pdf", replace	


	
* FIGURE 5

	reghdfe cit_change c.conc_enclave##i.year_start i.pnr_count  i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo coethnic_overtime
	margins, dydx(conc_enclave) over(year_start)
	marginsplot, ///
		title("") ytitle("", size(med) height(5)) xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) xscale(range(1991 (1) 2018)) xlab(#34, angle(45)) yline(0) ///
		recast(line) recastci(rline) ciopts(lpattern("--"))  ysize(3.5) xsize(4.80) scale(0.90)  xlabel(, nogrid)

	graph export "Fig. 5.pdf", replace
	
	
}
	
	
	
	
**********************

* Analyses in the Supporting Information (SI)
	// Codes for reproducing Fig. S1 to Fig. S28

**********************



if 1==1{


* Figure S1: Majority concentration 
	kdensity conc_maj if sample==1, xlabel(0 (.1) 1) xticks(#11) recast(area)  scheme(plotplain)title("") note("") xtitle(Ethnic majority concentration)
	graph export "Replicate_figS1_conc_maj.png", width(4000) replace

	

* Figure S2: Co-ethnic concentration 
	summ conc_enclave if sample==1, det
	kdensity conc_enclave if sample==1, xlabel(0 (.1) 1) xticks(#11) recast(area)  scheme(plotplain) xline(.2631579 ) text(12.5 0.35 "95th percentile", size(small)) title("") note("") xtitle(Co-ethnic concentration)
	graph export "Replicate_figS2_conc_enclave.png", width(4000) replace

	

* Figure S3: Citizenship over time 
	summ cit_change if pnr_count_analysis==1 & sample==1
	tab year_start cit_change if pnr_count_analysis==1 & sample==1
	graph bar cit_change if pnr_count_analysis==1 & sample==1, over(year_start, lab(angle(45))) yline(0.4218992, lcolor(black)) graphregion(color(white)) ytitle("Share who acquire citizenship", height(5)) scheme(plotplain) ylab(0 (0.20) 1) 
	graph export "Replicate_figS3_cit_over_time.png", width(4000) replace

	

* Figure S4: Bivariate_majority 
	reg cit_change c.conc_maj##c.conc_maj if sample==1, cluster(cohort_id)
	margins, at(conc_maj=(0 (0.1) 1))
	marginsplot, ///
		title("") ytitle("Probability of acquiring citizenship", size(med) height(5)) xtitle("Ethnic majority concentration", size(med) height(5)) ///
		scheme(plotplain) xlabel(, grid) ///
		recast(line) recastci(rline) ciopts(lpattern("--"))  ///
		addplot(kdensity conc_maj if sample==1, recast(area)  scheme(plotplain) fcolor(%20) lcolor(%20) yaxis(2) yscale(off axis(2))   below legend(off)) name(g1, replace)
	graph export "Replicate_figS4_bivariate_maj.png", width(4000) replace

	

* Figure S5: Bivariate_coethnic
	reg cit_change c.conc_enclave##c.conc_enclave if sample==1, cluster(cohort_id)
	margins, at(conc_enclave=(0 (0.1) 1))
	marginsplot, ///
		title("") ytitle("Probability of acquiring citizenship", size(med) height(5)) xtitle("Co-ethnic concentration", size(med) height(5)) ///
		scheme(plotplain) xlabel(, grid) ///
		recast(line) recastci(rline) ciopts(lpattern("--"))  ///
		addplot(kdensity conc_enclave if sample==1, recast(area)  scheme(plotplain) fcolor(%20) lcolor(%20) yaxis(2) yscale(off axis(2))   below legend(off))	xline(.2413793 .6666667, lpattern(solid) lcolor(%30)) text(0.44 0.32 "95th percentile" 0.44 0.75 "99th percentile", size(vsmall)) name(g2, replace)
	graph export "Replicate_figS5_bivariate_coethnic.png", width(4000) replace

	

* Figure S6: With / without covariates 
	reghdfe cit_change conc_maj $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.pnr_count  i.year_start i.grade i.birth_order  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo m1_maj // Covariates + Time FE + Family FE x School FE
	reghdfe cit_change conc_maj i.pnr_count  i.year_start i.grade i.birth_order  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo m2_maj // Time FE + Family FE x School FE
	
	reghdfe cit_change conc_enclave $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.pnr_count  i.year_start i.grade i.birth_order  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo m3_coethnic // Covariates + Time FE + Family FE x School FE
	reghdfe cit_change conc_enclave i.pnr_count  i.year_start i.grade i.birth_order  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo m4_coethnic // Time FE + Family FE x School FE

	coefplot (m1_maj, label("Baseline")) (m2_maj, label("Without covariates")) (m3_coethnic, label("Baseline")) (m4_coethnic, label("Without covariates")), vertical yline(0) keep(conc_maj conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") mlabel format(%9.2g) ylab(-0.1 (0.05) 0.15) mlabposition(4) ytitle("", height(8)) coeflabels(conc_maj="Majority concentration" conc_enclave="Co-ethnic concentration")  xline(1.5, lpattern(...))
	graph export "Replicate_figS6_without_cov.png", width(4000) replace

	

* Figure S7: Standardized effects 
	egen conc_maj_std = std(conc_maj)
	egen conc_enclave_std = std(conc_enclave)
	egen enclave_num_std = std(enclave_num)

	reghdfe cit_change c.conc_enclave_std $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.pnr_count  i.year_start i.grade i.birth_order if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo m_enclave_full_std
	reghdfe cit_change c.conc_maj_std $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.pnr_count  i.year_start i.grade i.birth_order if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo m_maj_std
	reghdfe cit_change c.conc_enclave_std c.conc_maj_std $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.pnr_count  i.year_start i.grade i.birth_order if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo m_both_std

	coefplot (m_maj_std, label("Baseline")) (m_both_std, label("Controlling for" "co-ethnic concentration")), vertical  yline(0)  keep(conc_maj_std) scheme(plotplain) legend(size(small)) ci(95 90) xtitle("") xlabel("") ylabel(-0.01 (0.005) 0.015, grid) xscale(range(0.5 1.5)) mlabel format(%9.2g) mlabposition(4) name(combined_maj, replace)
	graph export "Replicate_figS7_std_maj.png", width(4000) replace

	coefplot (m_enclave_full_std, label("Baseline")) (m_both_std, label("Controlling for" "majority concentration")), vertical  yline(0)  keep(conc_enclave_std) scheme(plotplain) legend(size(small)) ci(95 90) xtitle("") xlabel("") ylabel(-0.01 (0.005) 0.015, grid) xscale(range(0.5 1.5)) mlabel format(%9.2g) mlabposition(4) name(combined_coethnic, replace)
	graph export "Replicate_figS7_std_coethnic.png", width(4000) replace
	

	
* Figure S8: Alternative model specifications 
	foreach var in conc_maj conc_enclave{
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo nocov_`var' // No covariates
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(far_idn#inst) keepsingletons cluster(cohort_id)
		eststo fatherfe_`var' // Father_id instead of mother_id
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & conc_maj!=0 & conc_maj!=1 & cohort_size<100, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo noextreme_`var' // No extremes	
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & month!=1 & month!=12, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo nodiscretion_`var' // No discrection months
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing  if sample==1 & school_attend_max==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo nomovers_`var' // Drop school movers
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & m_cit==0 & f_cit==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo noparentcit_`var' // Drop parents with citizenship
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & sibling>1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo onlysib_`var' // Only those with siblings
		}

	coefplot (base, label("Baseline")) (nocov_conc_maj, label("No covariates")) (fatherfe_conc_maj, label("Father FE")) (noextreme_conc_maj, label("Exclude extreme observations")) (nodiscretion_conc_maj, label("Exclude discretion months")) (nomovers_conc_maj, label("Exclude school movers")) (noparentcit_conc_maj, label("Exclude parents with citizenship")) (onlysib_conc_maj, label("Only siblings")), vertical yline(0) keep(conc_maj) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") xlabel("") ylabel(-0.1 (0.05) 0.05) mlabel format(%9.2g) mlabposition(4) name(g3, replace) 
	graph export "Replicate_figS8_alt_model_maj.png", width(4000) replace

	coefplot (base2, label("Baseline")) (nocov_conc_enclave, label("No covariates")) (fatherfe_conc_enclave, label("Father FE")) (noextreme_conc_enclave, label("Exclude extreme observations")) (nodiscretion_conc_enclave, label("Exclude discretion months")) (nomovers_conc_enclave, label("Exclude school movers")) (noparentcit_conc_enclave, label("Exclude parents with citizenship")) (onlysib_conc_enclave, label("Only siblings")), vertical yline(0, lpattern(--)) keep(conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") xlabel("") ylabel(-0.05 (0.05) 0.15) mlabel format(%9.2g) mlabposition(2) name(g3, replace)
	graph export "Replicate_figS8_alt_model_coethnic.png", width(4000) replace

	

* Figure S9: Different types of measures of the ethnic composition 
	reghdfe cit_change conc_maj_class $covariates_missing $covariates_parents_missing $covariates_class_missing i.pnr_count2 i.year_start i.grade i.birth_order, absorb(mor_idn#inst) keepsingletons cluster(class_id)
	gen sample2=e(sample)

	sort pnr_count_analysis
	bys pnr: egen pnr_max = max(pnr_count_analysis)

	* Majority concentration 
	reghdfe cit_change c.family_dev2 i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo res_var_maj // Using the calculated residual variation ("family_dev2") as main IV
	reghdfe cit_change c.conc_avg i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo cohort_avg_maj // Cohort average (full sample)
	reghdfe cit_change c.conc_maj $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.year_start i.birth_order i.pnr_count i.grade if sample==1 & pnr_max==pnr_count_analysis, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	 eststo one_obs_maj // Only one observation per individual (pnr_max)
	reghdfe cit_change conc_maj_class i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_class_missing if sample2==1, absorb(mor_idn#inst) keepsingletons cluster(class_id)
	eststo class_maj  // X = Majority concentration, classroom 

	* Co-ethnic concentration
	reghdfe cit_change c.family_dev_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo res_var_coethnic // Using the calculated residual variation ("family_dev2") as main IV
	reghdfe cit_change c.conc_avg_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo cohort_avg_coethnic // Cohort average (full sample)
	reghdfe cit_change c.conc_enclave $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.year_start i.birth_order i.pnr_count i.grade if sample==1 & pnr_max==pnr_count_analysis, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	 eststo one_obs_coethnic // Only one observation per individual (pnr_max)
	reghdfe cit_change conc_enclave_class i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_class_missing if sample2==1, absorb(mor_idn#inst) keepsingletons cluster(class_id)
	eststo class_coethnic  // X = Coethnic concentration, classroom 

	coefplot (base, label("Baseline")) (one_obs_maj, label("One observation per individual")) (res_var_maj, label("Residual variation")) (cohort_avg_maj, label("Average majority concentration")) (class_maj, label("Majority concentration, classroom")), vertical yline(0) keep(conc_maj family_dev2 conc_maj_cit conc_maj_class conc_maj_cit_class conc_avg) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") xlabel("") ylabel(-0.1 (0.05) 0.05) mlabel format(%9.2g) mlabposition(9) xscale(range(0 5)) name(g2, replace) 
	graph export "Replicate_figS9_alt_measure_maj.png", width(4000) replace

	coefplot (base2, label("Baseline")) (one_obs_coethnic, label("One observation per individual")) (res_var_coethnic, label("Residual variation"))  (cohort_avg_coethnic, label("Average co-ethnic concentration")) (class_coethnic, label("Co-ethnic concentration, classroom")), keep(conc_enclave family_dev_enclave conc_avg_enclave conc_enclave_class2 conc_enclave_cit2) vertical yline(0) scheme(plotplain) graphregion(color(white)) ci(95 90) xtitle("") xlabel("") ylabel(-0.05 (0.05) 0.15) mlabel format(%9.2g) mlabposition(7) name(g2, replace) xscale(range(0 5))
	graph export "Replicate_figS9_alt_measure_coethnic.png", width(4000) replace


	
* Figure S10: Effect once other composition measures are included
	foreach var in conc_enclave conc_maj{
		
		* Add ethnic diversity
			reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing ed if sample==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo robust_ed_`var'
			
		* Add the size of the ethnic group 
			reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing enclave_num if sample==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo robust_size_`var'
			
		* Add ethnic diversity + size of the ethnic group + size of the cohort
			reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing ed enclave_num cohort_size if sample==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo robust_ed_size_`var'
		}
		
	coefplot ///
		(base2, label("Baseline")) ///
		(robust_ed_conc_enclave, label("Incl. ethnic diversity")) ///
		(robust_size_conc_enclave, label("Incl. group size")) ///
		(robust_ed_size_conc_enclave, label("Incl. diversity and group size")), ///
		vertical yline(0) keep(conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.05 (0.05) 0.15) xlabel("") mlabel format(%9.2g) mlabposition(2) name(g1, replace)
	graph export "Replicate_figS10_diversity_coethnic.png", width(4000) replace


	coefplot ///
		(base, label("Baseline")) ///
		(robust_ed_conc_maj, label("Incl. ethnic diversity")) ///
		(robust_size_conc_maj, label("Incl. group size")) ///
		(robust_ed_size_conc_maj, label("Incl. diversity and group size")), ///
		vertical yline(0) keep(conc_maj) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.1 (0.05) 0.20) xlabel("") mlabel format(%9.2g) mlabposition(2) name(g1, replace)		
	graph export "Replicate_figS10_diversity_maj.png", width(4000) replace


	
* Figure S11: Standard error clustering 
	foreach var in conc_maj conc_enclave{
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(pnr)
		eststo cluster_pnr_`var'
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(pnr cohort_id)
		eststo cluster_pnr_co_`var'
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) cluster(cohort_id)
		eststo nosingleton_`var'
		}


	coefplot (base, label("Baseline [majority]")) (nosingleton_conc_maj, label("Drop singleton observations [majority]")) (cluster_pnr_conc_maj, label("SE, individual [majority]")) (cluster_pnr_co_conc_maj, label("SE, individual & cohort [majority]")) (base2, label("Baseline [coethnic]")) (nosingleton_conc_enclave, label("Drop singleton observations [majority]")) (cluster_pnr_conc_enclave, label("SE, individual [coethnic]")) (cluster_pnr_co_conc_enclave, label("SE, individual & cohort [coethnic]")), vertical yline(0) keep(conc_maj conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") xlabel("") ylabel(-0.1 (0.05) 0.15) xscale(range(0 3))
	graph export "Replicate_figS11_SE_clustering_singletons.png", width(4000) replace

	
	
* Figure S12: Age citizenship acquired 
	recode age_cit(40/45=40 "40+"), gen(age_cit_comp)
	summ age_cit_comp if pnr_count2==1 & sample==1, det

	tab age_cit_comp if pnr_count2==1 & sample==1	
	hist age_cit_comp if pnr_count2==1 & sample==1,  xline(18, lcolor(black %40)) text(12 15 "Age = 18", size(small)) graphregion(color(white)) xtitle("Age citizenship acquired", height(5)) scheme(plotplain) xscale(range(5(1)40)) xlabel(5(5)40) legend(off) title("") note("") xlabel(5(1)40, labsize(small)) percent ylab(0 (5) 15)
	graph export "Replicate_figS12_age_cit.png", width(4000) replace 

	

* Figure S13: Short vs. longer-term effects
	gen age_2019 = 2019-born
	
	foreach var in conc_maj conc_enclave{
		reghdfe cit_change `var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & age_2019<18, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo restr_age17_`var'
		
		reghdfe cit_change `var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & age_2019>17, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo restr_age18_`var'
		
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & year_start<2007, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo restr_91_06_`var'
			
		reghdfe cit_change c.`var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & year_start<2000, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo restr_91_99_`var'
		}
	

	coefplot ///	
		(base2, label("Baseline")) ///
		(restr_age17_conc_enclave, label("Age < 18 in 2019")) ///	
		(restr_age18_conc_enclave, label("Age 18+ in 2019")) ///	
		(restr_91_06_conc_enclave, label("School years: 1991-2006")) ///		
		(restr_91_99_conc_enclave, label("School years: 1991-1999")), ///	
		vertical yline(0) keep(conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.05 (0.05) 0.30) xlabel("") mlabel format(%9.2g) mlabposition(2) name(g1, replace)
	
	graph export "Replicate_figS13_longterm_coethnic.png", width(4000) replace

	
	coefplot /// 
		(base, label("Baseline")) ///
		(restr_age17_conc_maj, label("Age < 18 in 2019")) ///	
		(restr_age18_conc_maj, label("Age 18+ in 2019")) ///		
		(restr_91_06_conc_maj, label("School years: 1991-2006")) ///		
		(restr_91_99_conc_maj, label("School years: 1991-1999")), ///	
		vertical yline(0) keep(conc_maj conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.1 (0.05) 0.20) xlabel("") mlabel format(%9.2g) mlabposition(2) name(g1, replace)
		
	graph export "Replicate_figS13_longterm_maj.png", width(4000) replace

	
	
* Figure S14: Short-term effects
	foreach var in conc_enclave conc_maj {			
		reghdfe statsb_ c.`var' i.pnr_count i.year_start i.grade $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(pnr_n#inst) keepsingletons cluster(cohort_id) // Student x School FE
		eststo sh_term_stufe_`var'
		reghdfe statsb_ `var' i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id) // Family x School FE			
		eststo sh_term_ffe_`var'
		}
		
	coefplot ///
		(base2, label("Baseline")) ///
		(sh_term_stufe_conc_enclave, label("Short term, student x school FE")) /// 
		(sh_term_ffe_conc_enclave, label("Short term, family x school FE")), ///
		vertical yline(0) keep(conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.05 (0.05) 0.15) xlabel("") mlabel format(%9.2g) mlabposition(2) name(g1, replace)
		
	graph export "Replicate_figS14_shortterm_coethnic.png", width(4000) replace			
	
	coefplot ///
		(base, label("Baseline")) ///
		(sh_term_stufe_conc_maj, label("Short term, student x school FE")) /// 
		(sh_term_ffe_conc_maj, label("Short term, family x school FE")), ///
		vertical yline(0) keep(conc_maj) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.1 (0.05) 0.05) xlabel("") mlabel format(%9.2g) mlabposition(3) name(g1, replace)

	graph export "Replicate_figS14_shortterm_maj.png", width(4000) replace		

	
	
* Figure S15: Short and long-term (before/after 18 y/o)

	* DV capturing whether people got citizenship before/after 18 y/o
		gen cit_bef_18 = 1 if age_cit<18
		replace cit_bef_18 = 0 if cit_change==0
		replace cit_bef_18 = 0 if age_cit>17
		replace cit_bef_18=. if cit_change==1 & age_cit==.

		gen cit_after_18 = 1 if  age_cit>17 
		replace cit_after_18=0 if cit_change==0
		replace cit_after_18 = 0 if age_cit<18
		replace cit_after_18=. if cit_change==1 & age_cit==.

	* Analyses	
		reghdfe cit_change c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & year_start<2007, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo base_coethnic_91_06
		
		reghdfe cit_change c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & year_start<2007, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		eststo base_maj_91_06
		
		foreach var in cit_bef_18 cit_after_18 {		
			reghdfe `var' c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & year_start<2007, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo `var'_enclave
			reghdfe `var' c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & year_start<2007, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo `var'_maj
			}
	
		coefplot (base_coethnic_91_06, label("Baseline")) ///
			(cit_bef_18_enclave, label("Before 18")) ///
			(cit_after_18_enclave, label("After 18")), ///
			vertical yline(0) keep(conc_enclave) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.1 (0.05) 0.20) xlabel("") mlabel format(%9.2g) mlabposition(2) name(g1, replace)		
		graph export "Replicate_figS15_longterm_2_coethnic.png", width(4000) replace


		coefplot (base_maj_91_06, label("Baseline")) ///
			(cit_bef_18_maj, label("Before 18")) ///
			(cit_after_18_maj, label("After 18")), ///
			vertical yline(0) keep(conc_maj) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") ylabel(-0.1 (0.05) 0.20) xlabel("") mlabel format(%9.2g) mlabposition(2) name(g1, replace)			
		graph export "Replicate_figS15_longterm_2_maj.png", width(4000) replace

		

* Figure S16: Heterogenous effects, gender
	reghdfe cit_change c.conc_maj##i.gender i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.grade i.year_start i.birth_order if sample==1 , absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_maj) over(gender) post
	eststo gender_maj
	marginsplot, ///
		title("")  xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) yline(0) ylab(-0.1 (0.05) 0.15) ytitle("") recast(scatter) xscale(range(-0.5 1.5)) level(95)
	graph export "Replicate_figS16_hetero_gender_maj.png", width(4000) replace

	reghdfe cit_change c.conc_enclave##i.gender i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.grade i.year_start i.birth_order if sample==1 , absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_enclave) over(gender) post
	eststo gender_enclave
	marginsplot, ///
		title("")  xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) yline(0) ylab(-0.1 (0.05) 0.15) ytitle("") recast(scatter)  xscale(range(-0.5 1.5)) level(95)
	graph export "Replicate_figS16_hetero_gender_coethnic.png", width(4000) replace

	
	
* Figure S17: Heterogenous effects, immigrant status
	reghdfe cit_change c.conc_maj##i.imm i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.grade i.year_start i.birth_order if sample==1 , absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_maj) over(imm)
	marginsplot, ///
		title("") ytitle("") xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) yline(0) ylab(-0.1 (0.05) 0.15) recast(scatter) xscale(range(1.5 3.5))
	graph export "Replicate_figS17_hetero_imm_maj.png", width(4000) replace

	reghdfe cit_change c.conc_enclave##i.imm i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.grade i.year_start i.birth_order if sample==1 , absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_enclave) over(imm)
	marginsplot, ///
		title("") ytitle("") xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) yline(0) ylab(-0.1 (0.05) 0.15) recast(scatter) xscale(range(1.5 3.5))
	graph export "Replicate_figS17_hetero_imm_coethnic.png", width(4000) replace



* Figure S18: Heterogenous effects, grade level
	reghdfe cit_change c.conc_maj##i.grade i.pnr_count i.year_start i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_maj) over(grade)
	marginsplot, ///
		title("") ytitle("Marginal effects", size(med) height(5)) xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) xscale(range(0 (1) 9)) xlab(#10, angle(45)) yline(0) ///
		recast(line) recastci(rline) ciopts(lpattern("--"))  yscale(range(-0.06(0.02) 0.14)) ylab(#14)		
	graph export "Replicate_figS18_hetero_grade_maj.png", width(4000) replace

	reghdfe cit_change c.conc_enclave##i.grade i.pnr_count i.year_start i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_enclave) over(grade)
	marginsplot, ///
		title("") ytitle("Marginal effects", size(med) height(5)) xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) xscale(range(0 (1) 9)) xlab(#10, angle(45)) yline(0) ///
		recast(line) recastci(rline) ciopts(lpattern("--")) yscale(range(-0.06(0.02) 0.14)) ylab(#14)		
	graph export "Replicate_figS18_hetero_grade_coethnic.png", width(4000) replace



* Figure S19: Heterogenous effects, birth order
	recode birth_order(8/12=8 "8-12"), gen(birth_order_com)

	reghdfe cit_change c.conc_maj##i.birth_order_com c.conc_maj##i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing c.conc_maj##i.grade c.conc_maj##i.year_start if sample==1 , absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_maj) over(birth_order_com)
	marginsplot, ///
		title("") ytitle("Marginal effects", size(med) height(5)) xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) recast(line) recastci(rline) ciopts(lpattern("--"))  legend(size(small)) yline(0) yscale(range(-0.05 (0.01) 0.02)) ylab(#12) xtitle("Birth order", height(8))
	graph export "Replicate_figS19_hetero_siborder_maj.png", width(4000) replace

	reghdfe cit_change c.conc_enclave##i.birth_order_com c.conc_enclave##i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing c.conc_enclave##i.grade c.conc_enclave##i.year_start if sample==1 , absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_enclave) over(birth_order_com)
	marginsplot, ///
		title("") ytitle("Marginal effects", size(med) height(5)) xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) recast(line) recastci(rline) ciopts(lpattern("--"))  legend(size(small)) yline(0) yscale(range(-0.05 (0.01) 0.02)) ylab(#12) xtitle("Birth order", height(8))
	graph export "Replicate_figS19_hetero_siborder_coethnic.png", width(4000) replace


	
* Figure S20: Heterogenous effects, time (majority concentration)
	reghdfe cit_change c.conc_maj##i.year_start i.pnr_count  i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo maj_overtime
	margins, dydx(conc_maj) over(year_start)
	marginsplot, ///
		title("") ytitle("", size(med) height(5)) xtitle("", size(med) height(5)) ///
		graphregion(color(white)) scheme(plotplain) legend(size(small)) xscale(range(1991 (1) 2018)) xlab(#34, angle(45)) yline(0) ///
		recast(line) recastci(rline) ciopts(lpattern("--")) 

	graph export "Replicate_figS20_hetero_time_maj.png", width(4000) replace
		 
		 

* Figures S21: Heterogenous effects, group size
	gen group_large = .
	replace group_large=0 if opr_land!=.
	replace group_large = 1 if opr_land==5172 | opr_land==5486 | opr_land==5436 | opr_land==5404 | opr_land==5289 
		
	reghdfe cit_change c.conc_enclave##i.group_large i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_enclave) over(group_large) post
	eststo group_large_small			

	coefplot, vertical yline(0) scheme(plotplain) graphregion(color(white)) legend(off) ci(95 90) ylabel(-0.05 (0.05) 0.15) mlabel format(%9.2g) mlabposition(2) coeflabel(0.group_large = "Smaller ethnic group" 1.group_large = "Larger ethnic group")
	graph export "Replicate_figS21_hetero_groupsize_coethnic.png", width(4000) replace 

	
	
* Figure S22: Heterogenous effects, majority share
	
	* Generate a dummy for whether native Danes or non-natives are in relative majority 
			gen maj_split = 1 if conc_maj>=0.50 & sample==1
			replace maj_split = 0 if conc_maj<0.50 & sample==1
	
	* Panel A		
		reghdfe cit_change c.conc_enclave##i.maj_split i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		margins, dydx(conc_enclave) over(maj_split) post
		eststo maj_split	
		coefplot maj_split, vertical yline(0) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") mlabel format(%9.2g) ylab(-0.1 (0.05) 0.15) mlabposition(4) ytitle("", height(8)) coeflabel(0.maj_split = "Native Danes in minority" 1.maj_split = "Native Danes in majority") legend(off)
		graph export "Replicate_figS22_hetero_majshare_panelA_coethnic.png", width(4000) replace

		
	* Panel B 
		reghdfe cit_change c.conc_enclave##i.conc_maj_cat i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		margins, dydx(conc_enclave) over(conc_maj_cat) post
		eststo maj_split_catvar	
		coefplot maj_split_catvar, vertical yline(0) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") mlabel format(%9.2g) ylab(-0.1 (0.05) 0.40) mlabposition(4) ytitle("", height(8))  legend(off)
		graph export "Replicate_figS22_hetero_majshare_panelB_coethnic.png", width(4000) replace

		
		
* Figure S23: EU vs. non-EU background
	frame change default

	reghdfe cit_change c.conc_maj##i.eu_scheng i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.grade i.year_start i.birth_order if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		lincom c.conc_maj 
		mat m1_noneu=r(estimate)\r(se)\r(df)
		lincom c.conc_maj+1.eu_scheng#c.conc_maj 
		mat m1_eu=r(estimate)\r(se)\r(df)
			
	reghdfe cit_change c.conc_enclave##i.eu_scheng i.pnr_count  $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.grade i.year_start i.birth_order if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
		lincom c.conc_enclave 
		mat m2_noneu=r(estimate)\r(se)\r(df)
		lincom c.conc_enclave+1.eu_scheng#c.conc_enclave 
		mat m2_eu=r(estimate)\r(se)\r(df)
			
	mat majconc=m1_noneu,m2_noneu
	mat enclaveconc=m1_eu,m2_eu
	mat colnames majconc = "Majority concentration" "Co-ethnic concentration"
	mat colnames enclaveconc = "Majority concentration" "Co-ethnic concentration"

	#delimit; 
		coefplot 
			(matrix(majconc), se(majconc[2]) df(majconc[3]) label("Non-EU country"))
			(matrix(enclaveconc), se(enclaveconc[2]) df(enclaveconc[3]) label("EU country")),
			vertical ylab(-0.15 (0.05) 0.15) yline(0) scheme(plotplain) ci(95 90) mlabel format(%9.2g) mlabposition(3) xline(1.5, lpattern(...))
		;

	graph export "Replicate_figS23_hetero_eu_noneu.png", width(4000) replace

	frame change analysis



* Figure S24: Information dissemination 
	frame change analysis

	reghdfe cit_change conc_enclave_cit2 i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo alt_x_3b_enclave
	
	reghdfe cit_change conc_enclave_cit_grade i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo alt_x_3b_enclave2 

	coefplot (base2, label("Baseline"))  (alt_x_3b_enclave2, label("Concentration of naturalized co-ethnics")) (alt_x_3b_enclave, label("Relative share of naturalized co-ethnics")), ///
		keep(conc_enclave conc_enclave_cit2 conc_enclave_cit_grade) ///
		vertical yline(0) scheme(plotplain) graphregion(color(white)) legend(size(small)) ci(95 90) xtitle("") xlabel("") ylabel(-0.05 (0.05) 0.15) mlabel format(%9.2g) mlabposition(2) name(g2, replace)
		 
	graph export "Replicate_figS24_info_mechanism.png", width(4000) replace



* Figure S25: Ethnic composition and national identity
	* The data and code for replicating this figure is stored in another DST projet, project 707835. In this project, the replication do-file can be found here: E:\workdata\707835\Mathias (workfolder)\pnas_replication\replication_pnas_citizenship_identity


	
* Figure S26: Heterogenous mechanism analyses, immigrants vs. descendants (co-ethnic concentration)
	reghdfe math_std c.conc_enclave##i.imm i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_enclave) over(imm) post
	eststo enclave_math_immdes

	reghdfe dan_std c.conc_enclave##i.imm i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_enclave) over(imm) post
	eststo enclave_dan_immdes

	coefplot enclave_math_immdes enclave_dan_immdes, vertical yline(0) ylab(-0.3 (0.1) 0.3) scheme(plotplain) ci(95 90) mlabel format(%9.3g) mlabposition(3) xline(1.5, lpattern(...)) legend(order(3 "Math, std." 6 "Danish, std."))

	graph export "Replicate_figS26_imm_des_coethnic.png", width(4000) replace


	
* Figure S27: Heterogenous mechanism analyses, immigrants vs. descendants (majority concentration)
	reghdfe math_std c.conc_maj##i.imm i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_maj) over(imm) post
	eststo maj_math_immdes

	reghdfe dan_std c.conc_maj##i.imm i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	margins, dydx(conc_maj) over(imm) post
	eststo maj_dan_immdes

	coefplot maj_math_immdes maj_dan_immdes, vertical yline(0) ylab(-0.3 (0.1) 0.3) scheme(plotplain) ci(95 90) mlabel format(%9.3g) mlabposition(3) xline(1.5, lpattern(...)) legend(order(3 "Math, std." 6 "Danish, std.")) 
	
	graph export "Replicate_figS27_imm_des_maj.png", width(4000) replace



* Figure S28. Academic achievement and naturalization over time 

	* Compare only grades within the same type of exam (see overview in Table S13 in the SI)		
		frame copy analysis grades2
		frame change grades2

		use "E:\workdata\702992\702992\Mathias Kruse\Diversity - Childhood_Adulthood\Datasæt\Final_analyses_sample2.dta", clear

		frame change analysis
		frlink m:1 id_unique, frame(grades2)
		frget dan_avg_samecourse math_avg_samecourse, from(grades2)
		frame drop grades2

		egen math_std_samecourse = std(math_avg_samecourse)
		egen dan_std_samecourse = std(dan_avg_samecourse)
	
	* Panel A: Danish
		reghdfe cit_change c.dan_std_samecourse##i.year_start c.conc_enclave i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & grade==9 & cit_9th_dum==1 & eu_scheng==0 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & eu_scheng==0 & year_start>=2001 , absorb(mor_idn#inst) keepsingletons
		margins, dydx(dan_std_samecourse) over(year_start)
		marginsplot, scheme(plotplain) yline(0) title("") ytitle("") xtitle("") ylabel(-0.1 (0.05) 0.20)	
		graph export "Replicate_figS28_timedependent_dan_nat.png", width(4000) replace
	
	
	* Panel B: Math	
		reghdfe cit_change c.math_std_samecourse##i.year_start c.conc_enclave i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & grade==9 & cit_9th_dum==1 & eu_scheng==0 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & eu_scheng==0 & year_start>=2001 , absorb(mor_idn#inst) keepsingletons
		margins, dydx(math_std_samecourse) over(year_start)
		marginsplot, scheme(plotplain) yline(0) title("") ytitle("") xtitle("") ylabel(-0.1 (0.05) 0.20)
		graph export "Replicate_figS28_timedependent_math_nat.png", width(4000) replace	
			 
}		 



**********************

* Analyses in the Supporting Information (SI)
	// Codes for reproducing multivariate regression tables (Table S14 to Table S18)
	
**********************
	 
		 
if 1==1{
	

* Table S14 and Table S15: Linear and quadratic effect of ethnic majority concentration (S14) and co-ethnic concentration (S15)	

	* Linear relationship
		foreach var in conc_maj conc_enclave{
			reg cit_change `var' if sample==1, cluster(cohort_id)
			eststo m1_`var' // Bivariate
				
			reghdfe cit_change `var' $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.pnr_count  i.year_start i.grade i.birth_order_com  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo m2_`var' // Covariates + Time FE + Family FE x School FE

			reghdfe cit_change `var' i.pnr_count i.year_start i.grade i.birth_order_com  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo m3_`var' // No covariates + Time FE + Family FE x School FE
			}


	* Quadratic relationship 
		foreach var in conc_maj conc_enclave{ 
			reg cit_change c.`var'##c.`var' if sample==1, cluster(cohort_id)
			eststo m1q_`var' // Bivariate

			reghdfe cit_change c.`var'##c.`var' $covariates_missing $covariates_parents_missing $covariates_cohort_missing i.pnr_count  i.year_start i.grade i.birth_order_com  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo m2q_`var' // Covariates + Time FE + Family FE x School FE

			reghdfe cit_change c.`var'##c.`var' i.pnr_count i.year_start i.grade i.birth_order_com  if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
			eststo m3q_`var' // No covariates + Time FE + Family FE x School FE
			}

	esttab m1_conc_maj m2_conc_maj m3_conc_maj m1q_conc_maj m2q_conc_maj m3q_conc_maj using "Replicate_tabS14_lin_quad_full_maj.rtf", b(3) r(3) se(3)  mtitle("Model 1. Bivariate" " Model 2. School x Family FE" "Model 3. School x Family FE [no covariates]" "Model 4. Bivariate" " Model 5. School x Family FE" "Model 6. School x Family FE [no covariates]") nogaps nolines replace 

	esttab m1_conc_enclave m2_conc_enclave m3_conc_enclave m1q_conc_enclave m2q_conc_enclave m3q_conc_enclave  using "Replicate_tabS15_lin_quad_full_coethnic.rtf", b(3) r(3) se(3)  mtitle("Model 1. Bivariate" " Model 2. School x Family FE" "Model 3. School x Family FE [no covariates]" "Model 4. Bivariate" " Model 5. School x Family FE" "Model 6. School x Family FE [no covariates]") nogaps nolines replace 


	
	
* Table S16: Skills as mechanism
	frame change default 

	reghdfe math_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing  if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo maj_math_full_noneu

	reghdfe dan_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo maj_dan_full_noneu

	reghdfe math_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo enclave_math_full_noneu

	reghdfe dan_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & eu_scheng==0 | sample_dum2==1 & eu_scheng==0, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo enclave_dan_full_noneu
	
	esttab /// 
	base base2 /// Baseline estimates 
	maj_math_full_noneu maj_dan_full_noneu enclave_math_full_noneu enclave_dan_full_noneu /// X => M, full sample
	using "Replicate_tabS16_skills_full_a.rtf", b(3) r(3) se(3) mtitle("Baseline, majority concentration" "Baseline, co-ethnic concentration" "Majority concentration on math scores (std.)" "Majority concentration on Danish scores (std.)" "Co-ethnic concentration on math scores (std.)" "Co-ethnic concentration on Danish scores (std.)") label nogaps nolines replace 	
	
	
	frame change analysis	

	reghdfe math_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo maj_math_noneu

	reghdfe dan_std c.conc_maj i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo maj_dan_noneu

	reghdfe math_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo enclave_math_noneu

	reghdfe dan_std c.conc_enclave i.pnr_count  i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo enclave_dan_noneu

	reg cit_change math_std if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & year_start>=2001 , r 
	eststo math_biv 

	reghdfe cit_change math_std c.conc_enclave i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & year_start>=2001 , absorb(mor_idn#inst) keepsingletons
	eststo math_familyxschool

	reg cit_change dan_std if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & year_start>=2001 , r 
	eststo dan_std

	reghdfe cit_change dan_std c.conc_enclave i.year_start i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1 & grade==9 & cit_9th_dum==1 & year_start>=2001  | sample==1 & grade==9 & cit_change==0 & eu_scheng==0 & year_start>=2001 , absorb(mor_idn#inst) keepsingletons
	eststo dan_familyxschool
	
esttab /// 
	base base2 /// Baseline estimates 
	maj_math_noneu maj_dan_noneu enclave_math_noneu enclave_dan_noneu /// X => M, main sample
	math_biv math_familyxschool dan_std dan_familyxschool /// M => Y
	using "Replicate_tabS16_skills_full_b.rtf", b(3) r(3) se(3) mtitle("Baseline, majority concentration" "Baseline, co-ethnic concentration" "Majority concentration on math scores (std.)" "Majority concentration on Danish scores (std.)" "Co-ethnic concentration on math scores (std.)" "Co-ethnic concentration on Danish scores (std.)" "Math scores (std.) on naturalization" "Math scores (std.) on naturalization" "Danish scores (std.) on naturalization" "Danish scores (std.) on naturalization") label nogaps nolines replace 	
	
	
	
* Table S17: Parents' education and academic achievement 
	foreach var in math_std dan_std{
		
		* Bivariate
			reg `var' i.m_edu_mis if sample==1, r
			eststo `var'_m_edu
			reg `var' i.f_edu_mis if sample==1, r
			eststo `var'_f_edu
			
		* Including mother FE
			reghdfe `var' i.m_edu_mis if sample==1, absorb(mor_idn) keepsingletons
			eststo `var'_m_edu_fe
			reghdfe `var' i.f_edu_mis if sample==1, absorb(mor_idn) keepsingletons
			eststo `var'_f_edu_fe
	}

	esttab *_m_edu* *_f_edu* using "Replicate_tabS17_education.rtf", se(3) b(3) r(3) mtitle("Math, mother" "Math, mother (FE)" "Danish, mother" "Danish, mother (FE)" "Math, father" "Math, father (FE)" "Danish, father" "Danish, father (FE)") replace 

	

* Table S18: Marginal effect of ethnic composition on naturalization over time 
	reghdfe cit_change c.conc_enclave##i.year_start i.pnr_count  i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo coethnic_overtime

	reghdfe cit_change c.conc_maj##i.year_start i.pnr_count  i.grade i.birth_order $covariates_missing $covariates_parents_missing $covariates_cohort_missing if sample==1, absorb(mor_idn#inst) keepsingletons cluster(cohort_id)
	eststo maj_overtime
	
	esttab maj_overtime coethnic_overtime using "Replicate_tabS18_over_time.rtf", b(3) r(3) se(3) mtitle("Ethnic majority concentration" "Co-ethnic concentration") label nogaps nolines replace 
  
}


